Using real data. For more infromation on how these data were obtain go to: http://panthera:8888/notebooks/external_plugins/paper3code/%5BD.E%5DCell%20Extractor.ipynb

import pandas as pd
import numpy as np
import pystan
import statsmodels.api as stmod
import scipy as sp
%matplotlib inline
import sys
sys.path.append('/apps')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
## Use the ggplot style
plt.style.use('ggplot')
## Now, let's use pystan
import geopandas as gpd
import pystan
import patsy
#import utilities.data_extraction as de
## Use the ggplot style
plt.style.use('ggplot')
import geoviews as gv
import geoviews.feature as gf
from cartopy import crs
#gv.extension('bokeh','matplotlib')
gv.extension('bokeh')
plt.rcParams['figure.figsize'] = [10, 10]
Change working directory to a hierarchy such that the folder 'data' is a sibling node
cd ../external_plugins/JSDM4POD/
[Errno 2] No such file or directory: '../external_plugins/JSDM4POD/' /main/app/external_plugins/JSDM4POD
## Geometry file
datafile = "data/multispecies_data_puebla_p9_geometry.json"
## Readfile
data_w_islands = gpd.read_file(datafile,na_values='N.A.')
data_w_islands = data_w_islands.replace('N.A.', np.nan)
data_w_islands.set_index(['cell_ids','level'],inplace=True)
#gg.plot(edgecolor='black')
data_w_islands.xs(1,level=1).plot(edgecolor='black')
<AxesSubplot:>
## Read the adjacency matrix.
## This was obtained from biospytial.
## In Biospytial the map above and this matrix are attributes of the same class.
mat_filename = "data/training_data_sample_puebla_p9_abies_pinophyta_adjmat.npy"
M = np.load(file=mat_filename).astype('float')
## Read nodes from the graph file
## New thing starting here, reset kernell and start here
#file_ = "/outputs/training_data_sample_puebla_p9_graph_neighbourlist.csv"
#raw_nodes = pd.read_csv(file_)
#plt.imshow(M[:100,:100])
## Calculate number of neighbours
D = np.sum(M,axis=1).astype('float')
## We need to remove polygons with zero neighbours.
## Failure to do so would have repercusions in the Positive definiteness of Covariance Matrix.
## Remove islands
idx_islands = np.where(D == 0 )
##
nD = np.delete(D,idx_islands,axis=0)
NM = np.delete(M,idx_islands,axis=0)
NM = np.delete(NM,idx_islands,axis=1)
## Convert to ND into a diagonal matrix
ND = np.diag(nD)
## convert index in matrix to index in cellids
island_cell_id = data_w_islands.index.levels[0][idx_islands][0]
## remove island according to the index of the cell_ids
data = data_w_islands.drop(island_cell_id)
/opt/conda/envs/biospytial3/lib/python3.8/site-packages/pandas/core/generic.py:4153: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance. obj = obj._drop_axis(labels, axis, level=level, errors=errors)
## this converts the columns to float
data_d = data[data.columns[2:12]].astype(float)
## Ok, Im very tired and so I will replace the nas with the mean corresponding to the column
means = data_d.mean()
for column in data_d:
data_d[column].replace(np.nan,means[column],inplace=True)
data_d.columns
Index(['Dist.to.road_m', 'Elevation_m', 'MaxTemp_m', 'MeanTemp_m', 'MinTemp_m',
'Population_m', 'Precipitation_m', 'SolarRadiation_m', 'VaporPres_m',
'WindSp_m'],
dtype='object')
data_d.columns[1:]
Index(['Elevation_m', 'MaxTemp_m', 'MeanTemp_m', 'MinTemp_m', 'Population_m',
'Precipitation_m', 'SolarRadiation_m', 'VaporPres_m', 'WindSp_m'],
dtype='object')
data_d.columns = ['distance_to_road'] + list(data_d.columns[1:])
## Add an intercept to each process.
#### This is a work around and should be avoided in production.
formula = '~ Elevation_m + Precipitation_m + distance_to_road + Population_m'
X1 = patsy.dmatrix(formula,data=data_d,return_type='dataframe',NA_action="drop")
X2 = patsy.dmatrix(formula,data=data_d,return_type='dataframe',NA_action="drop")
X1.drop(['distance_to_road','Population_m'],axis=1,inplace=True)
X2.drop(['Elevation_m','Precipitation_m'],axis=1,inplace=True)
X = pd.concat([X1,X2],axis=1)
import models
reload(models)
<module 'external_plugins.paper3code.models' from '/apps/external_plugins/paper3code/models.py'>
## Load model and compile
multispecies_model = models.multispeciesCARModel_stationaryBernoulli()
%time multispecies_model = pystan.StanModel(model_code=multispecies_model)
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_5ea4edceb6f41caad64880d7a1a99598 NOW.
CPU times: user 1.41 s, sys: 71.6 ms, total: 1.48 s Wall time: 1min 18s
levels = data.index.get_level_values(1)
MaxLevels = 6
## seems that N_edges is defined differently here.
N_edges = int(np.sum(NM)/2.0)
N = X.shape[0]
K = X.shape[1]
J = MaxLevels
adjacency_matrix = NM # Removed islands from M
## number of levels to model
## It needs here a precheck of the dimmensions of the covariates
n_eco_covs = 3
n_samp_covs = 3
data_multilevel_CAR = {'N' : N,
'N_ecological_covariates' : n_eco_covs,
'N_sample_covariates' : n_samp_covs,
'J': J,
'N_edges' : N_edges,
'N_areas' : int(X.shape[0]/J),
'level': levels, # Stan counts starting at 1
'W' : adjacency_matrix,
#'y': simulation['logit_p_sim'].values,
#'y': simulation['q'].values,
'y': data.Y.values.astype('int'),
#'y': yy.simulated.values.flatten().astype('float'),
'x': X,
}
data.groupby(by='taxon').first()
| Y | Dist.to.road_m | Elevation_m | MaxTemp_m | MeanTemp_m | MinTemp_m | Population_m | Precipitation_m | SolarRadiation_m | VaporPres_m | WindSp_m | Longitude | Latitude | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| taxon | ||||||||||||||
| Complement | 1.0 | 1972.682830810547 | 3 | 28.46611108779907 | 24.25555555555556 | 20.05999995337592 | 37.54676008224487 | 127.4388888888889 | 17104.49444444444 | 2.553833331664403 | 3.151111125946045 | -96.977411 | 20.432293 | MULTIPOLYGON (((-96.99006 20.46289, -96.98497 ... |
| Leucaena | 0.0 | 1972.682830810547 | 3 | 28.46611108779907 | 24.25555555555556 | 20.05999995337592 | 37.54676008224487 | 127.4388888888889 | 17104.49444444444 | 2.553833331664403 | 3.151111125946045 | -96.977411 | 20.432293 | MULTIPOLYGON (((-96.99006 20.46289, -96.98497 ... |
| Phyllostomidae | 0.0 | 1972.682830810547 | 3 | 28.46611108779907 | 24.25555555555556 | 20.05999995337592 | 37.54676008224487 | 127.4388888888889 | 17104.49444444444 | 2.553833331664403 | 3.151111125946045 | -96.977411 | 20.432293 | MULTIPOLYGON (((-96.99006 20.46289, -96.98497 ... |
| Picidae | 1.0 | 1972.682830810547 | 3 | 28.46611108779907 | 24.25555555555556 | 20.05999995337592 | 37.54676008224487 | 127.4388888888889 | 17104.49444444444 | 2.553833331664403 | 3.151111125946045 | -96.977411 | 20.432293 | MULTIPOLYGON (((-96.99006 20.46289, -96.98497 ... |
| Pinaceae | 0.0 | 1972.682830810547 | 3 | 28.46611108779907 | 24.25555555555556 | 20.05999995337592 | 37.54676008224487 | 127.4388888888889 | 17104.49444444444 | 2.553833331664403 | 3.151111125946045 | -96.977411 | 20.432293 | MULTIPOLYGON (((-96.99006 20.46289, -96.98497 ... |
| Quercus | 0.0 | 1972.682830810547 | 3 | 28.46611108779907 | 24.25555555555556 | 20.05999995337592 | 37.54676008224487 | 127.4388888888889 | 17104.49444444444 | 2.553833331664403 | 3.151111125946045 | -96.977411 | 20.432293 | MULTIPOLYGON (((-96.99006 20.46289, -96.98497 ... |
%time fit3 = multispecies_model.sampling(data=data_multilevel_CAR, iter=300,chains=4, control={'adapt_delta':0.81}) #,'max_treedepth':15})
#tuto = fit3.extract(permuted=True)
#tuto.keys()
WARNING:pystan:Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat. To run all diagnostics call pystan.check_hmc_diagnostics(fit) WARNING:pystan:600 of 600 iterations saturated the maximum tree depth of 10 (100 %) WARNING:pystan:Run again with max_treedepth larger than 10 to avoid saturation
CPU times: user 1min 24s, sys: 1.88 s, total: 1min 26s Wall time: 57min 39s
print(fit3.stansummary(pars='beta'))
print(fit3.stansummary(pars='alpha'))
print(fit3.stansummary(pars='alpha_car'))
print(fit3.stansummary(pars='tau'))
Inference for Stan model: anon_model_5ea4edceb6f41caad64880d7a1a99598.
4 chains, each with iter=300; warmup=150; thin=1;
post-warmup draws per chain=150, total post-warmup draws=600.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[1,1] -0.57 1.03 1.46 -1.96 -1.7 -1.11 0.67 1.89 2 158.03
beta[2,1] -0.37 0.73 1.04 -1.51 -1.14 -0.65 0.47 1.36 2 65.45
beta[3,1] 0.82 0.41 0.58 -0.07 0.29 0.92 1.34 1.53 2 50.6
beta[4,1] -0.57 0.5 0.71 -1.57 -1.25 -0.44 0.12 0.16 2 38.18
beta[5,1] -0.78 1.12 1.58 -2.03 -1.91 -1.52 0.52 1.93 2 120.23
beta[6,1] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[1,2] -0.01 7.9e-3 0.01 -0.03 -0.02-7.6e-3-2.1e-3-1.9e-3 2 15.79
beta[2,2] -0.04 0.04 0.06 -0.15 -0.07 -0.01-4.1e-3-1.7e-3 2 12.6
beta[3,2] -0.02 0.02 0.02 -0.06 -0.03-5.2e-3-2.8e-3-1.2e-3 2 16.77
beta[4,2]-6.3e-3 5.9e-3 8.5e-3 -0.02-9.9e-3-2.1e-3-8.0e-4-3.0e-4 2 9.26
beta[5,2]-5.6e-3 3.5e-3 5.0e-3 -0.01 -0.01-4.1e-3-7.4e-4-5.3e-5 2 9.48
beta[6,2] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[1,3] -0.23 0.15 0.22 -0.59 -0.42 -0.15 -0.04 -0.04 2 26.84
beta[2,3] -0.63 0.63 0.9 -2.32 -1.15 -0.15 -0.06 -0.02 2 25.13
beta[3,3] -0.34 0.33 0.46 -1.21 -0.6 -0.1 -0.06 -0.02 2 17.07
beta[4,3] -0.41 0.36 0.52 -1.39 -0.7 -0.14 -0.07 -0.04 2 16.29
beta[5,3] -0.29 0.17 0.24 -0.67 -0.51 -0.23 -0.06 -0.02 2 15.23
beta[6,3] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[1,4] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[2,4] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[3,4] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[4,4] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[5,4] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[6,4] 1.44 0.14 0.24 0.85 1.34 1.41 1.64 1.82 3 3.07
beta[1,5] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[2,5] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[3,5] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[4,5] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[5,5] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[6,5]-6.7e-5 1.5e-5 3.0e-5-1.2e-4-8.8e-5-6.4e-5-5.2e-5 2.8e-6 4 2.24
beta[1,6] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[2,6] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[3,6] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[4,6] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[5,6] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
beta[6,6] 2.3e-4 3.2e-5 4.9e-5 1.5e-4 2.0e-4 2.3e-4 2.6e-4 3.3e-4 2 2.4
Samples were drawn using NUTS at Thu Jul 15 03:31:36 2021.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Inference for Stan model: anon_model_5ea4edceb6f41caad64880d7a1a99598.
4 chains, each with iter=300; warmup=150; thin=1;
post-warmup draws per chain=150, total post-warmup draws=600.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
alpha[1,1] 0.39 0.22 0.31 0.05 0.08 0.35 0.71 0.78 2 92.92
alpha[2,1] 0.2 0.14 0.19 9.3e-3 0.05 0.14 0.36 0.54 2 73.05
alpha[3,1] 0.28 0.17 0.24 0.02 0.11 0.23 0.47 0.67 2 82.47
alpha[4,1] 0.28 0.14 0.19 0.03 0.1 0.25 0.45 0.57 2 48.15
alpha[5,1] 0.33 0.21 0.3 0.06 0.08 0.23 0.6 0.81 2 98.14
alpha[6,1] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
alpha[1,2] 0.61 0.22 0.31 0.22 0.29 0.65 0.92 0.95 2 92.92
alpha[2,2] 0.8 0.14 0.19 0.46 0.64 0.86 0.95 0.99 2 73.05
alpha[3,2] 0.72 0.17 0.24 0.33 0.53 0.77 0.89 0.98 2 82.47
alpha[4,2] 0.72 0.14 0.19 0.43 0.55 0.75 0.9 0.97 2 48.15
alpha[5,2] 0.67 0.21 0.3 0.19 0.4 0.77 0.92 0.94 2 98.14
alpha[6,2] 1.0 nan 0.0 1.0 1.0 1.0 1.0 1.0 nan nan
Samples were drawn using NUTS at Thu Jul 15 03:31:36 2021.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Inference for Stan model: anon_model_5ea4edceb6f41caad64880d7a1a99598.
4 chains, each with iter=300; warmup=150; thin=1;
post-warmup draws per chain=150, total post-warmup draws=600.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
alpha_car 0.54 0.04 0.06 0.45 0.49 0.54 0.61 0.63 2 10.47
Samples were drawn using NUTS at Thu Jul 15 03:31:36 2021.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Inference for Stan model: anon_model_5ea4edceb6f41caad64880d7a1a99598.
4 chains, each with iter=300; warmup=150; thin=1;
post-warmup draws per chain=150, total post-warmup draws=600.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
tau 0.19 1.9e-3 4.6e-3 0.18 0.19 0.19 0.19 0.2 6 1.28
Samples were drawn using NUTS at Thu Jul 15 03:31:36 2021.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
## Continue after burnin
# reuse tuned parameters
stepsize = fit3.get_stepsize()
# by default .get_inv_metric returns a list
inv_metric = fit3.get_inv_metric(as_dict=True)
init = fit3.get_last_position()
# increment seed by 1
#seed2 = seed + 1
control = {"stepsize" : stepsize,
"inv_metric" : inv_metric,
"adapt_engaged" : False
}
%time fit_continue = multispecies_model.sampling(data=data_multilevel_CAR,iter=100,chains=4,control=control,init=init)#seed=seed,) #,'max_treedepth':15})
WARNING:pystan:Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat. To run all diagnostics call pystan.check_hmc_diagnostics(fit) WARNING:pystan:200 of 200 iterations saturated the maximum tree depth of 10 (100 %) WARNING:pystan:Run again with max_treedepth larger than 10 to avoid saturation
CPU times: user 1min, sys: 3.14 s, total: 1min 3s Wall time: 39min 35s
data_dd= data.drop(['Latitude', 'Longitude'],axis=1)
#Extract P and S
results = fit3.extract(pars=['P','S','G','Q'])
#results = fit_continue.extract(pars=['P','S','G','Q'])
import scipy.special as sp
from holoviews import opts
P = results['P']
S = results['S']
G = results['G']
Q = results['Q']
P = pd.DataFrame(P.T)
Q = pd.DataFrame(Q.T)
G = pd.DataFrame(G.T)
S = pd.DataFrame(S.T)
quant = 0.5
#P_median = sp.expit(P.quantile(q=quant,axis=1))
#Q_median = sp.expit(Q.quantile(q=quant,axis=1))
#G_median = G.quantile(q=quant,axis=1)
#S_median = sp.expit(S.quantile(q=quant,axis=1))
#P_median = P.mean(axis=1)
P_median = P.quantile(q=0.5,axis=1)
Q_median = Q.quantile(q=0.5,axis=1)
G_median = G.quantile(q=0.5,axis=1)
S_median = S.quantile(q=0.5,axis=1)
x = np.linspace(-15,15,100)
plt.plot(x[1:],np.diff(sp.expit(x)))
[<matplotlib.lines.Line2D at 0x7feded4ab520>]
medians_levels = pd.concat([P_median,Q_median],axis=1)
medians_s = pd.concat([G_median,S_median],axis=1)
medians_levels.index = data.index
medians_levels.columns = ['Pm','Qm']
medians_s.index = data.xs(1,level=1).index
medians_s.columns = ['Gm','Sm']
new_l = gpd.GeoDataFrame(pd.concat([medians_levels,data_dd],axis=1))
new_s = gpd.GeoDataFrame(pd.concat([medians_s,data_dd.xs(1,level=1)],axis=1))
%time taxa_df = list(map(lambda i : data.xs(i,level=1)[data.Y.xs(i,level=1) > 0],range(1,7) ))
%time taxa_results = list(map(lambda i : new_l.xs(i,level=1),range(1,7)))
CPU times: user 170 ms, sys: 32 µs, total: 170 ms Wall time: 169 ms CPU times: user 74.6 ms, sys: 0 ns, total: 74.6 ms Wall time: 74.5 ms
tnames = list(data_dd['taxon'].xs(238478))
i = 0
group_name = tnames[i]
label_p = "Ecological Suitability for "
label_q = "Mixed latent Q(x) for "
complement_df = taxa_df[5]
presences_df = taxa_df[i]
#pines_pt = gTDF[gTDF.Abies > 0]
#complement_pt = taxa_df[1]
#### Change accordingly
width = 600
height = 650
sample_pt = gv.Points((complement_df.Longitude,complement_df.Latitude),group=group_name,label='Sample').opts(
fill_color = 'red',
line_color = 'grey',
line_width = 0.2,
line_alpha = 0.9,
fill_alpha = 0.6,
size = 2,
)
presence_pt = gv.Points((presences_df.Longitude,presences_df.Latitude),group=group_name,label='Presence').opts(
marker = 'x',
color = 'black')
obs = (sample_pt * presence_pt)
cmap = plt.cm.viridis
#cmap = plt.cm.seismic
P = gv.Polygons(taxa_results[i],vdims=['Pm','Qm'],group=group_name,label="Ecological Suitability P(x)")
Q = gv.Polygons(taxa_results[i],vdims=['Qm','Pm'],group=group_name,label="Mixing effect Q(x)")
latent_pres_fig = ((P * obs).relabel(label_p) + (Q * obs).relabel(label_q))
#latent_pres_fig = ((P).relabel(lm) + (Q ).relabel(lm))
#clim = (-4,4)
#clim = (0,1)
latent_pres_fig.opts(
opts.Polygons(
line_width=0.001,
line_alpha = 0.2,
tools=['hover'],
width = width,
height = height,
colorbar = True,
#clim = clim,
colorbar_position = 'bottom',
alpha = 1.0,
cmap = cmap,),
#merge_tools=False
)
i = 1
group_name = tnames[i]
label_p = "Ecological Suitability for "
label_q = "Mixed latent Q(x) for "
complement_df = taxa_df[5]
presences_df = taxa_df[i]
#pines_pt = gTDF[gTDF.Abies > 0]
#complement_pt = taxa_df[1]
#### Change accordingly
width = 600
height = 650
sample_pt = gv.Points((complement_df.Longitude,complement_df.Latitude),group=group_name,label='Sample').opts(
fill_color = 'red',
line_color = 'grey',
line_width = 0.2,
line_alpha = 0.9,
fill_alpha = 0.6,
size = 2,
)
presence_pt = gv.Points((presences_df.Longitude,presences_df.Latitude),group=group_name,label='Presence').opts(
marker = 'x',
color = 'black')
obs = (sample_pt * presence_pt)
cmap = plt.cm.viridis
P = gv.Polygons(taxa_results[i],vdims=['Pm','Qm'],group=group_name,label="Ecological Suitability P(x)")
Q = gv.Polygons(taxa_results[i],vdims=['Qm','Pm'],group=group_name,label="Mixing effect Q(x)")
latent_pres_fig = ((P * obs).relabel(label_p) + (Q * obs).relabel(label_q))
#latent_pres_fig = ((P).relabel(lm) + (Q ).relabel(lm))
#clim = (-4,4)
latent_pres_fig.opts(
opts.Polygons(
line_width=0.001,
line_alpha = 0.2,
tools=['hover'],
width = width,
height = height,
colorbar = True,
#clim = clim,
colorbar_position = 'bottom',
alpha = 1.0,
cmap = cmap,),
#merge_tools=False
)
i = 2
group_name = tnames[i]
label_p = "Ecological Suitability for "
label_q = "Mixed latent Q(x) for "
complement_df = taxa_df[5]
presences_df = taxa_df[i]
#pines_pt = gTDF[gTDF.Abies > 0]
#complement_pt = taxa_df[1]
#### Change accordingly
width = 600
height = 650
sample_pt = gv.Points((complement_df.Longitude,complement_df.Latitude),group=group_name,label='Sample').opts(
fill_color = 'red',
line_color = 'grey',
line_width = 0.2,
line_alpha = 0.9,
fill_alpha = 0.6,
size = 2,
)
presence_pt = gv.Points((presences_df.Longitude,presences_df.Latitude),group=group_name,label='Presence').opts(
marker = 'x',
color = 'black')
obs = (sample_pt * presence_pt)
cmap = plt.cm.viridis
P = gv.Polygons(taxa_results[i],vdims=['Pm','Qm'],group=group_name,label="Ecological Suitability P(x)")
Q = gv.Polygons(taxa_results[i],vdims=['Qm','Pm'],group=group_name,label="Mixing effect Q(x)")
latent_pres_fig = ((P * obs).relabel(label_p) + (Q * obs).relabel(label_q))
#latent_pres_fig = ((P).relabel(lm) + (Q ).relabel(lm))
#clim = (-4,4)
latent_pres_fig.opts(
opts.Polygons(
line_width=0.001,
line_alpha = 0.2,
tools=['hover'],
width = width,
height = height,
colorbar = True,
#clim = clim,
colorbar_position = 'bottom',
alpha = 1.0,
cmap = cmap,),
#merge_tools=False
)
i = 3
group_name = tnames[i]
label_p = "Ecological Suitability for "
label_q = "Mixed latent Q(x) for "
complement_df = taxa_df[5]
presences_df = taxa_df[i]
#pines_pt = gTDF[gTDF.Abies > 0]
#complement_pt = taxa_df[1]
#### Change accordingly
width = 600
height = 650
sample_pt = gv.Points((complement_df.Longitude,complement_df.Latitude),group=group_name,label='Sample').opts(
fill_color = 'red',
line_color = 'grey',
line_width = 0.2,
line_alpha = 0.9,
fill_alpha = 0.6,
size = 2,
)
presence_pt = gv.Points((presences_df.Longitude,presences_df.Latitude),group=group_name,label='Presence').opts(
marker = 'x',
color = 'black')
obs = (sample_pt * presence_pt)
cmap = plt.cm.viridis
P = gv.Polygons(taxa_results[i],vdims=['Pm','Qm'],group=group_name,label="Ecological Suitability P(x)")
Q = gv.Polygons(taxa_results[i],vdims=['Qm','Pm'],group=group_name,label="Mixing effect Q(x)")
latent_pres_fig = ((P * obs).relabel(label_p) + (Q * obs).relabel(label_q))
#latent_pres_fig = ((P).relabel(lm) + (Q ).relabel(lm))
#clim = (-4,4)
latent_pres_fig.opts(
opts.Polygons(
line_width=0.001,
line_alpha = 0.2,
tools=['hover'],
width = width,
height = height,
colorbar = True,
#clim = clim,
colorbar_position = 'bottom',
alpha = 1.0,
cmap = cmap,),
#merge_tools=False
)
i = 4
group_name = tnames[i]
label_p = "Ecological Suitability for "
label_q = "Mixed latent Q(x) for "
complement_df = taxa_df[5]
presences_df = taxa_df[i]
#pines_pt = gTDF[gTDF.Abies > 0]
#complement_pt = taxa_df[1]
#### Change accordingly
width = 600
height = 650
sample_pt = gv.Points((complement_df.Longitude,complement_df.Latitude),group=group_name,label='Sample').opts(
fill_color = 'red',
line_color = 'grey',
line_width = 0.2,
line_alpha = 0.9,
fill_alpha = 0.6,
size = 2,
)
presence_pt = gv.Points((presences_df.Longitude,presences_df.Latitude),group=group_name,label='Presence').opts(
marker = 'x',
color = 'black')
obs = (sample_pt * presence_pt)
cmap = plt.cm.viridis
#cmap = plt.cm.RdBu
#cmap = plt.cm.Blues
P = gv.Polygons(taxa_results[i],vdims=['Pm','Qm'],group=group_name,label="Ecological Suitability P(x)")
Q = gv.Polygons(taxa_results[i],vdims=['Qm','Pm'],group=group_name,label="Mixing effect Q(x)")
latent_pres_fig = ((P * obs).relabel(label_p) + (Q * obs).relabel(label_q))
#latent_pres_fig = ((P).relabel(lm) + (Q ).relabel(lm))
#clim = (-4,4)
clim = (0,1)
latent_pres_fig.opts(
opts.Polygons(
line_width=0.001,
line_alpha = 0.2,
tools=['hover'],
width = width,
height = height,
colorbar = True,
#clim = clim,
colorbar_position = 'bottom',
alpha = 1.0,
cmap = cmap,),
#merge_tools=False
)
i = 5
group_name = tnames[i]
label_p = "Ecological Suitability for "
label_q = "Mixed latent Q(x) for "
complement_df = taxa_df[5]
presences_df = taxa_df[i]
#pines_pt = gTDF[gTDF.Abies > 0]
#complement_pt = taxa_df[1]
#### Change accordingly
width = 600
height = 650
sample_pt = gv.Points((complement_df.Longitude,complement_df.Latitude),group=group_name,label='Sample').opts(
fill_color = 'red',
line_color = 'grey',
line_width = 0.2,
line_alpha = 0.9,
fill_alpha = 0.6,
size = 2,
)
presence_pt = gv.Points((presences_df.Longitude,presences_df.Latitude),group=group_name,label='Presence').opts(
marker = 'x',
color = 'black')
obs = (sample_pt * presence_pt)
cmap = plt.cm.viridis
#cmap = plt.cm.RdBu
P = gv.Polygons(taxa_results[i],vdims=['Pm','Qm'],group=group_name,label="Ecological Suitability P(x)")
Q = gv.Polygons(taxa_results[i],vdims=['Qm','Pm'],group=group_name,label="Mixing effect Q(x)")
latent_pres_fig = ((P * obs).relabel(label_p) + (Q * obs).relabel(label_q))
#latent_pres_fig = ((P).relabel(lm) + (Q ).relabel(lm))
clim = (0,1)
latent_pres_fig.opts(
opts.Polygons(
line_width=0.001,
line_alpha = 0.2,
tools=['hover'],
width = width,
height = height,
colorbar = True,
clim = clim,
colorbar_position = 'bottom',
alpha = 1.0,
cmap = cmap,),
#merge_tools=False
)
Depending on the model, generating a convergent posterior sample could take several hours. In this case, it is recommended to save the model for later use. To do this, we can pickle the 'fit3' object and save it to disk. Afterwards, we can load this pickled object for later analysis, plotting and generating further samples.
It is recommended to use the same machine for generating the posterior sample (and compiled model) and for loading the pickled object. If the model is compiled in a computer with a different architecture (and operative system) that the one intending to load the pickled object, the unpickling function will fail.
import pickle
_file = "data/multispecies_model-changename.pkl"
with open(_file,"wb") as f:
# see that multispecies_model is the object model
%time pickle.dump([multispecies_model,fit3],f,protocol=-1)
with open(_file, "rb") as f:
data_list = pickle.load(f)
fit = data_dict['fit']
# fit = data_list[1]
/opt/conda/envs/biospytial/lib/python2.7/site-packages/pandas/core/indexes/base.py:162: FutureWarning: the 'labels' keyword is deprecated, use 'codes' instead return cls.__new__(cls, **d)
print(fit.stansummary(pars='beta'))
print(fit.stansummary(pars='alpha'))
print(fit.stansummary(pars='alpha_car'))
print(fit.stansummary(pars='tau'))
Inference for Stan model: anon_model_c886f98cb6e47ecc6d69746055875c04.
5 chains, each with iter=10; warmup=5; thin=1;
post-warmup draws per chain=5, total post-warmup draws=25.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[1,1] 0.22 nan 1.26 -1.0 -0.78 -0.55 1.61 1.83 nan inf
beta[2,1] 0.58 nan 1.1 -0.84 -0.11 0.17 1.68 1.98 nan inf
beta[3,1] 0.59 nan 1.2 -1.18 -0.38 1.11 1.52 1.88 nan inf
beta[4,1] 0.14 nan 0.85 -0.54 -0.49 -0.2 0.23 1.71 nan inf
beta[5,1] -0.91 nan 1.06 -1.92 -1.9 -1.16 -0.44 0.86 nan inf
beta[6,1] 0.88 nan 1.1 -0.93 0.48 0.93 1.95 1.99 nan inf
beta[1,2] -0.63 nan 0.93 -1.73 -1.47 -0.72 0.03 0.73 nan inf
beta[2,2] -0.26 nan 1.19 -1.61 -1.21 -0.67 0.82 1.38 nan inf
beta[3,2] 0.24 nan 1.0 -1.19 -0.02 0.01 0.57 1.84 nan inf
beta[4,2] -0.04 nan 1.12 -1.89 -0.48 0.09 0.86 1.24 nan inf
beta[5,2] -0.6 nan 0.7 -1.59 -0.9 -0.68 -0.33 0.49 nan inf
beta[6,2] -0.48 nan 0.37 -0.88 -0.8 -0.61 -0.09 -0.01 nan inf
beta[1,3] -0.33 nan 1.38 -1.86 -1.43 -0.83 0.73 1.73 nan inf
beta[2,3] 0.84 nan 0.88 -0.71 0.76 0.88 1.53 1.76 nan inf
beta[3,3] 0.2 nan 1.18 -1.99 0.37 0.47 0.69 1.45 nan inf
beta[4,3] -0.21 nan 1.14 -1.41 -1.02 -0.82 0.64 1.54 nan inf
beta[5,3] -0.48 nan 1.32 -1.89 -1.76 -0.41 0.01 1.64 nan inf
beta[6,3] -0.36 nan 0.63 -1.53 -0.38 -0.09 0.02 0.2 nan inf
Samples were drawn using NUTS at Thu Feb 13 18:55:59 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Inference for Stan model: anon_model_c886f98cb6e47ecc6d69746055875c04.
5 chains, each with iter=10; warmup=5; thin=1;
post-warmup draws per chain=5, total post-warmup draws=25.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
alpha[1,1] 0.49 nan 0.26 0.15 0.29 0.51 0.66 0.85 nan inf
alpha[2,1] 0.4 nan 0.22 0.13 0.15 0.49 0.55 0.66 nan inf
alpha[3,1] 0.43 nan 0.16 0.13 0.38 0.53 0.54 0.56 nan inf
alpha[4,1] 0.58 nan 0.28 0.16 0.35 0.74 0.77 0.88 nan inf
alpha[5,1] 0.33 nan 0.09 0.24 0.27 0.28 0.4 0.48 nan inf
alpha[6,1] 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0 nan nan
alpha[1,2] 0.51 nan 0.26 0.15 0.34 0.49 0.71 0.85 nan inf
alpha[2,2] 0.6 nan 0.22 0.34 0.45 0.51 0.85 0.87 nan inf
alpha[3,2] 0.57 nan 0.16 0.44 0.46 0.47 0.62 0.87 nan inf
alpha[4,2] 0.42 nan 0.28 0.12 0.23 0.26 0.65 0.84 nan inf
alpha[5,2] 0.67 nan 0.09 0.52 0.6 0.72 0.73 0.76 nan inf
alpha[6,2] 1.0 nan 0.0 1.0 1.0 1.0 1.0 1.0 nan nan
Samples were drawn using NUTS at Thu Feb 13 18:55:59 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Inference for Stan model: anon_model_c886f98cb6e47ecc6d69746055875c04.
5 chains, each with iter=10; warmup=5; thin=1;
post-warmup draws per chain=5, total post-warmup draws=25.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
alpha_car 0.53 nan 0.22 0.13 0.5 0.61 0.71 0.73 nan inf
Samples were drawn using NUTS at Thu Feb 13 18:55:59 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Inference for Stan model: anon_model_c886f98cb6e47ecc6d69746055875c04.
5 chains, each with iter=10; warmup=5; thin=1;
post-warmup draws per chain=5, total post-warmup draws=25.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
tau 1.92 nan 1.26 0.15 1.56 1.9 2.0 4.0 nan inf
Samples were drawn using NUTS at Thu Feb 13 18:55:59 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).